/*
	add_durable_column.do
	

		//this code adds a durable column to the data. Durables are identified in the
		//following way:
		//1. All the purchases below XX$ are considered not durables
		//2. For each purchase (negative transaction) above XX$, I identify how
		//   often the transaction recurs based on userid and description. The idea
		//   is that maybe the mortgage payments are slightly different based on the
		//   number of days of the month (I saw such examples), but the description
		//   is always the same.
		//3. I create a dummy called mask_durable_bydesk that takes value 1 if a purchase with
		//   a certain description recurs less than two times in the dataset (again, the
		//   idea is that if I buy three big TVs in the sample at least one will have
		//   a different transaction descrption (maybe Amazon fucks this up).
		//4. Repeat 2 and 3 using userid and amount to calculate the frequency. This
		//   new variable is called mask_durable_byamount.
		//5. Create a conservative mask, called mask_durable_cons, which takes value 1
		//   if both masks before have value 1.
*/



***********************************************
* Start of filter
***********************************************


local threshold = -600

g lower_desc = lower(genericXXXXdescription)

generate flag_tobeconsidered = 0
replace  flag_tobeconsidered = 1 if  inferredcategoryid == . ///
								   | inferredcategoryid == 2001 ///
								   | inferredcategoryid == 20 ///
								   | inferredcategoryid == 21 ///
								   | inferredcategoryid == 2002 ///
								   | inferredcategoryid == 2101 ///
								   | inferredcategoryid == 2102 ///
								   | inferredcategoryid > 5005
								   


//if small amounts then 99999
g freq = 99999
bys userid genericXXXXdescription: replace freq = _N if amount < `threshold' ///
													 &  flag_tobeconsidered == 1 ///
													 & !strpos(lower_desc, "savings") /// 
													 & !strpos(lower_desc, "transfer") ///
													 & !strpos(lower_desc, "vacation") ///
													 & !strpos(lower_desc, "travel")

generate mask_durable_bydesc = (freq <= 6)
lab var  mask_durable_bydesc "indicator of large durables, freq based on description"
drop freq


g freq = 99999
bys userid amount: replace freq = _N if amount < `threshold' ///
													 &  flag_tobeconsidered == 1 ///
													 & !strpos(lower_desc, "savings") /// 
													 & !strpos(lower_desc, "transfer") ///
													 & !strpos(lower_desc, "vacation") ///
													 & !strpos(lower_desc, "travel")

generate mask_durable_byamount = (freq <= 1)
lab var  mask_durable_byamount "indicator of large durables, freq based on amount"
drop freq


generate mask_durable_combined = (mask_durable_bydesc + mask_durable_byamount == 2)
lab var  mask_durable_combined "indicator of large durables, combined"


drop lower_desc flag_tobeconsidered
